#!/usr/bin/python

import sys
import re
from Bio.Blast import NCBIXML

pfam_xml_file = sys.argv[1]
pxf = open(pfam_xml_file)
pfam = NCBIXML.parse(pxf)
pfam_rec = pfam.next()

m1 = re.compile('(pfam\d+), (\w+),.*')

align_list = []
hsp_list = []
hsp_lengths = []
hsp_expects = []

for alignment in pfam_rec.alignments:
    for hsp in alignment.hsps:
        if hsp.expect < 0.001:
            if m1.match(alignment.hit_def):
                pattern = m1.match(alignment.hit_def)
                pfam_id = pattern.group(1)
                domain = pattern.group(2)
                align_list.append(domain)

unique_set = set(align_list)

for a in unique_set:
    print pfam_xml_file, a

pxf.close()